今回だけでggplot2を習得するのは難しいので本当に紹介程度です。 コピペでも全然OKです。 岩嵜先生のとても参考になる資料がよくまとまっています。 資料1 資料2
資料はネット上に日本語、英語問わずたくさんある。
データ可視化は重要。 Rは可視化に強い。特に人気の可視化パッケージであるggplot2を中心に学ぶ。
Pythonだとmatplotlibやseabornが人気のよう。最近のいけてる論文のfigは結構ggplot2で作成されている。 ggplot2の説明は難しいのでとにかくコードを実行してたくさん図を作っていきましょう。
イメージはレイヤを重ねていく感じ
基本
ggplot() # 使うデータを指定
aes() # x軸、y軸など指定
geom_() # 棒グラフか、散布図かヒストグラムなのかなど
theme_() # 背景や軸の見栄え
library(ggplot2)
head(diamonds) # ダイアモンドのデータ
## # A tibble: 6 x 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.290 Premium I VS2 62.4 58 334 4.2 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
head(mpg) # 自動車の燃費のデータ
## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(… f 18 29 p comp…
## 2 audi a4 1.8 1999 4 manua… f 21 29 p comp…
## 3 audi a4 2 2008 4 manua… f 20 31 p comp…
## 4 audi a4 2 2008 4 auto(… f 21 30 p comp…
## 5 audi a4 2.8 1999 6 auto(… f 16 26 p comp…
## 6 audi a4 2.8 1999 6 manua… f 18 26 p comp…
# ?mpgでこのデータセットの詳細がわかる
# displは排気量, hwyは燃費
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
m1 <- ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, colour = class))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, shape = class))
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, colour = "red"))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~class)
m2 <- ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, colour = class)) +
facet_wrap(~class)
m1
m2
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy))
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, colour = drv))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(aes(colour = class)) +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggplot(data = diamonds, mapping = aes(x = cut)) +
geom_bar()
ggplot(data = diamonds, mapping = aes(x = cut, fill = cut)) +
geom_bar()
ggplot(data = diamonds, mapping = aes(x = cut, y = price)) +
geom_boxplot()
a <- ggplot(diamonds, aes(cut, price, fill = cut)) +
geom_violin(trim = FALSE) +
geom_boxplot(width = 0.08, fill = "white", outlier.size = FALSE) +
theme_light()
a + coord_flip()
ggsave(a + coord_flip(), filename = "violinplot.jpg")
## Saving 7 x 5 in image
画像の保存方法 GUIでやるかggsave関数
getwd()
## [1] "/Users/juyoutai/Desktop/R/2019_handson_ggplot2"
list.files()
## [1] "2019_handson_ggplot2.Rproj" "20190907_handson.html"
## [3] "20190907_handson.Rmd" "data2_corrected.csv"
## [5] "expression_annt.txt" "ggplot2_cache"
## [7] "ggplot2_files" "ggplot2_script.R"
## [9] "ggplot2.html" "ggplot2.Rmd"
## [11] "ggpointdensity.html" "ggpointdensity.Rmd"
## [13] "heatmap_2.txt" "hex_bin_ggseqlogo.Rmd"
## [15] "iris_2.csv" "iris.csv"
## [17] "iris.tsv" "limma_result.txt"
## [19] "merged_annot.txt" "violinplot.jpg"
data <- readr::read_csv("data2_corrected.csv")
## Parsed with column specification:
## cols(
## `Sample ID` = col_double(),
## Age = col_double(),
## Gender = col_character(),
## Height = col_double(),
## Weight = col_double(),
## BMI = col_double(),
## Food_A = col_double()
## )
head(data)
## # A tibble: 6 x 7
## `Sample ID` Age Gender Height Weight BMI Food_A
## <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 1 36 Female 162 52 19.8 18
## 2 2 13 Female 160 43 16.8 8
## 3 3 20 Female 153 46 19.7 37
## 4 4 24 Male 167 54 19.4 57
## 5 5 22 Female 153 43 18.4 14
## 6 6 48 Male 168 60 21.3 35
class(data)
## [1] "spec_tbl_df" "tbl_df" "tbl" "data.frame"
str(data)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 80 obs. of 7 variables:
## $ Sample ID: num 1 2 3 4 5 6 7 8 9 10 ...
## $ Age : num 36 13 20 24 22 48 46 49 26 50 ...
## $ Gender : chr "Female" "Female" "Female" "Male" ...
## $ Height : num 162 160 153 167 153 168 153 157 159 153 ...
## $ Weight : num 52 43 46 54 43 60 44 44 48 42 ...
## $ BMI : num 19.8 16.8 19.7 19.4 18.4 21.3 18.8 17.9 19 17.9 ...
## $ Food_A : num 18 8 37 57 14 35 88 100 37 7.2 ...
## - attr(*, "spec")=
## .. cols(
## .. `Sample ID` = col_double(),
## .. Age = col_double(),
## .. Gender = col_character(),
## .. Height = col_double(),
## .. Weight = col_double(),
## .. BMI = col_double(),
## .. Food_A = col_double()
## .. )
summary(data)
## Sample ID Age Gender Height
## Min. : 1.00 Min. :10.00 Length:80 Min. :129.0
## 1st Qu.:20.75 1st Qu.:25.75 Class :character 1st Qu.:153.0
## Median :40.50 Median :37.00 Mode :character Median :158.5
## Mean :40.50 Mean :35.60 Mean :158.6
## 3rd Qu.:60.25 3rd Qu.:46.00 3rd Qu.:165.2
## Max. :80.00 Max. :60.00 Max. :177.0
## Weight BMI Food_A
## Min. :30.00 Min. :16.00 Min. : 7.20
## 1st Qu.:44.75 1st Qu.:19.00 1st Qu.: 37.75
## Median :50.00 Median :19.80 Median : 59.00
## Mean :52.08 Mean :20.62 Mean : 71.27
## 3rd Qu.:56.25 3rd Qu.:21.60 3rd Qu.: 93.25
## Max. :80.00 Max. :27.10 Max. :360.00
summary(data$Food_A)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.20 37.75 59.00 71.27 93.25 360.00
mean(data$Food_A) # 平均
## [1] 71.265
sd(data$Food_A) # 標準偏差
## [1] 52.98051
var(data$Food_A) # 分散
## [1] 2806.934
class(data$Gender) # Rではデータの型が重要となる. Genderはcharacterなので、これをfactorに変換する必要がある
## [1] "character"
data$Gender <- factor(data$Gender, #factorに変更
levels = c("Male", "Female")) # Maleを1, Femaleを2に指定
class(data$Gender)
## [1] "factor"
summary(data)
## Sample ID Age Gender Height
## Min. : 1.00 Min. :10.00 Male :35 Min. :129.0
## 1st Qu.:20.75 1st Qu.:25.75 Female:45 1st Qu.:153.0
## Median :40.50 Median :37.00 Median :158.5
## Mean :40.50 Mean :35.60 Mean :158.6
## 3rd Qu.:60.25 3rd Qu.:46.00 3rd Qu.:165.2
## Max. :80.00 Max. :60.00 Max. :177.0
## Weight BMI Food_A
## Min. :30.00 Min. :16.00 Min. : 7.20
## 1st Qu.:44.75 1st Qu.:19.00 1st Qu.: 37.75
## Median :50.00 Median :19.80 Median : 59.00
## Mean :52.08 Mean :20.62 Mean : 71.27
## 3rd Qu.:56.25 3rd Qu.:21.60 3rd Qu.: 93.25
## Max. :80.00 Max. :27.10 Max. :360.00
summary(subset(data, Gender == 'Female')) #女性のみを抽出
## Sample ID Age Gender Height
## Min. : 1.0 Min. :12.00 Male : 0 Min. :140.0
## 1st Qu.:17.0 1st Qu.:24.00 Female:45 1st Qu.:150.0
## Median :35.0 Median :33.00 Median :155.0
## Mean :38.2 Mean :33.38 Mean :154.7
## 3rd Qu.:58.0 3rd Qu.:45.00 3rd Qu.:159.0
## Max. :79.0 Max. :53.00 Max. :172.0
## Weight BMI Food_A
## Min. :36.00 Min. :16.0 Min. : 7.20
## 1st Qu.:43.00 1st Qu.:18.9 1st Qu.: 28.00
## Median :47.00 Median :19.8 Median : 43.00
## Mean :48.29 Mean :20.2 Mean : 61.96
## 3rd Qu.:51.00 3rd Qu.:21.1 3rd Qu.: 71.00
## Max. :66.00 Max. :26.4 Max. :360.00
p <- ggplot(data, aes(Age, Food_A)) + # 解析対象の列を指定
geom_point() # 散布図なのでpointで作図することを指定
p
p + geom_point(aes(color = Gender)) # デフォルト
p + geom_point(aes(color = Gender)) +
scale_colour_manual(values = c(Male = "black", Female = "red")) # 色を指定した
p + geom_point(size = 3, aes(shape = Gender)) # 丸と三角
p + stat_smooth(method = "lm") # 回帰直線を追加
summary(data)
## Sample ID Age Gender Height
## Min. : 1.00 Min. :10.00 Male :35 Min. :129.0
## 1st Qu.:20.75 1st Qu.:25.75 Female:45 1st Qu.:153.0
## Median :40.50 Median :37.00 Median :158.5
## Mean :40.50 Mean :35.60 Mean :158.6
## 3rd Qu.:60.25 3rd Qu.:46.00 3rd Qu.:165.2
## Max. :80.00 Max. :60.00 Max. :177.0
## Weight BMI Food_A
## Min. :30.00 Min. :16.00 Min. : 7.20
## 1st Qu.:44.75 1st Qu.:19.00 1st Qu.: 37.75
## Median :50.00 Median :19.80 Median : 59.00
## Mean :52.08 Mean :20.62 Mean : 71.27
## 3rd Qu.:56.25 3rd Qu.:21.60 3rd Qu.: 93.25
## Max. :80.00 Max. :27.10 Max. :360.00
p <- ggplot(data, # データを指定
aes(Height, Weight)) + # x, y軸をそれぞれ指定
geom_point(aes(color = Gender)) # 性差について色分け
p
p <- ggplot(data, aes(x = Gender, y = BMI))
p + geom_boxplot() # 箱ひげ図
# 身長の分布
ggplot(data, aes(x = Height)) + geom_density()
ggplot(data, aes(x = Height)) + geom_density(aes(colour = Gender))
ggplot(data, aes(x = Height)) + geom_histogram(fill = "white", colour = "black") +
facet_grid(Gender ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# お遊び〜Food_Aと体重の関係は?
ggplot(data, aes(x = Weight, y = Food_A)) + geom_point()
# 正規性が無いのでSpearmanの相関係数
ggplot(data, aes(x = Weight)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data, aes(x = Food_A)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
cor.test(data$Food_A, #xを指定
data$Weight, #yを指定
method="spearman") #Spearman's correlationを指定
## Warning in cor.test.default(data$Food_A, data$Weight, method = "spearman"):
## Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: data$Food_A and data$Weight
## S = 62755, p-value = 0.01776
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2644733
# p値と相関係数は?
# ここまで
ggplot(data, aes(x = Food_A)) + geom_histogram(fill = "white", colour = "black") +
facet_grid(Gender ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
参照 視覚多様性の時代
# viridis theme
data <- data.frame(x = rnorm(10000), y = rnorm(10000))
# viridis
g1 <- ggplot(data, aes(x = x, y = y)) + geom_hex() + coord_fixed() +
scale_fill_viridis_c() +
theme_bw()
# magma
g2 <- ggplot(data, aes(x, y)) + geom_hex() + coord_fixed() +
scale_fill_viridis_c(option = "magma") +
theme_bw()
# inferno
g3 <- ggplot(data, aes(x, y)) + geom_hex() + coord_fixed() +
scale_fill_viridis_c(option = "inferno") +
theme_bw()
# plasma
g4 <- ggplot(data, aes(x, y)) + geom_hex() + coord_fixed() +
scale_fill_viridis_c(option = "plasma") +
theme_bw()
# cividis
g5 <- ggplot(data, aes(x, y)) + geom_hex() + coord_fixed() +
scale_fill_viridis_c(option = "cividis") +
theme_bw()
# default colour of ggplot2
g6 <- ggplot(data, aes(x, y)) + geom_hex() + coord_fixed() + theme_bw()
gridExtra::grid.arrange(g1, g2, g3, g4, g5, g6)
gplotsパッケージのheatmap.2関数を使ってヒートマップ作成
heatmap_2 <- read.table("heatmap_2.txt", header = T, sep = "\t", row.names = 1)
head(heatmap_2)
## GSM995221_AD01M001.CEL.gz GSM995222_AD01M002.CEL.gz
## Il1r2 9.709961 9.499286
## Il1rl1 10.869457 10.595727
## Il18rap 8.661484 8.596499
## Icos 11.556232 11.625647
## Fam129a 10.466855 10.324924
## Rgs16 10.649960 10.630798
## GSM995223_AD01M003.CEL.gz GSM995224_AD01M004.CEL.gz
## Il1r2 8.653889 8.677315
## Il1rl1 9.412765 9.279133
## Il18rap 7.772801 7.761333
## Icos 11.010891 10.862727
## Fam129a 9.845083 9.640511
## Rgs16 10.320202 10.154570
## GSM995225_AD01M005.CEL.gz GSM995226_AD01M006.CEL.gz
## Il1r2 6.766613 7.282546
## Il1rl1 6.696681 6.555741
## Il18rap 6.975710 6.711794
## Icos 8.990778 9.695695
## Fam129a 8.449838 9.240955
## Rgs16 9.282057 9.599733
## GSM995227_AD01M007.CEL.gz GSM995228_AD01M008.CEL.gz
## Il1r2 7.030207 7.114095
## Il1rl1 6.561950 6.557071
## Il18rap 6.987141 6.770077
## Icos 9.515691 9.177716
## Fam129a 8.827236 8.570377
## Rgs16 9.579403 9.278177
colnames(heatmap_2) <- c("1", "2", "3", "4", "5", "6", "7", "8")
head(heatmap_2)
## 1 2 3 4 5 6 7
## Il1r2 9.709961 9.499286 8.653889 8.677315 6.766613 7.282546 7.030207
## Il1rl1 10.869457 10.595727 9.412765 9.279133 6.696681 6.555741 6.561950
## Il18rap 8.661484 8.596499 7.772801 7.761333 6.975710 6.711794 6.987141
## Icos 11.556232 11.625647 11.010891 10.862727 8.990778 9.695695 9.515691
## Fam129a 10.466855 10.324924 9.845083 9.640511 8.449838 9.240955 8.827236
## Rgs16 10.649960 10.630798 10.320202 10.154570 9.282057 9.599733 9.579403
## 8
## Il1r2 7.114095
## Il1rl1 6.557071
## Il18rap 6.770077
## Icos 9.177716
## Fam129a 8.570377
## Rgs16 9.278177
class(heatmap_2)
## [1] "data.frame"
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(viridis)
## Loading required package: viridisLite
# png("heatmap.png", width = 400, height = 800)
heatmap.2(as.matrix(heatmap_2), col = bluered(256), trace = "none", cexCol = 0.9, cexRow = 0.5, labRow = F)
heatmap.2(as.matrix(heatmap_2), col = bluered(256), trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row", labRow = F) # scaling
# Seurat colour
heatmap.2(as.matrix(heatmap_2), col = colorRampPalette(c("#FF00FF", "#000000", "#FFFF00")), trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row", labRow = F)
heatmap.2(as.matrix(heatmap_2), col = viridis, trace = "none", cexCol = 0.9, cexRow = 0.5, labRow = F)
heatmap.2(as.matrix(heatmap_2), col = viridis, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row", labRow = F)
heatmap.2(as.matrix(heatmap_2), col = viridis, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "col", labRow = F)
heatmap.2(as.matrix(heatmap_2), col = magma, trace = "none", cexCol = 0.9, cexRow = 0.5, labRow = F)
# scaling
heatmap.2(as.matrix(heatmap_2), col = magma, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row", labRow = F)
heatmap.2(as.matrix(heatmap_2), col = plasma, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row", labRow = F)
heatmap.2(as.matrix(heatmap_2), col = inferno, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row", labRow = F)
heatmap.2(as.matrix(heatmap_2), col = cividis, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row", labRow = F)
heatmap.2(as.matrix(heatmap_2), col = viridis, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row", labRow = F)
同じような作業を繰り返すなら簡単なプログラミングで効率的に この程度ならfor構文で構わない map関数でもできると思う(勉強不足)
for (i in c(viridis, magma, plasma, inferno, cividis)) {
heatmap.2(as.matrix(heatmap_2), col = i, trace = "none", cexCol = 0.9, cexRow = 0.5, scale = "row")
}
complexheatmapを用いると複雑なヒートマップ描画できるが今回は割愛
地図かける geom_sf()
動画も作れる gganimate
MAプロットとボルケーノプロット マイクロアレイやRNA-seqの発現変動遺伝子を示す際によく用いられる
### 少しはバイオインフォマティクス的なプロットも
# MA-plotやvolcano plotを作図してみる
merged <- readr::read_delim("merged_annot.txt", delim = " ")
## Parsed with column specification:
## cols(
## PROBEID = col_character(),
## logFC = col_double(),
## AveExpr = col_double(),
## t = col_double(),
## P.Value = col_double(),
## adj.P.Val = col_double(),
## B = col_double(),
## SYMBOL = col_character(),
## GENENAME = col_character()
## )
str(merged)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 15297 obs. of 9 variables:
## $ PROBEID : chr "ILMN_1763207" "ILMN_1739001" "ILMN_2384056" "ILMN_2393765" ...
## $ logFC : num -5.87 -5.57 -5.48 -5.45 -5.45 ...
## $ AveExpr : num 8.83 7.61 8.66 12.69 7.34 ...
## $ t : num -6.64 -5.96 -5.3 -5.89 -10.46 ...
## $ P.Value : num 2.17e-04 4.36e-04 8.98e-04 4.71e-04 9.79e-06 ...
## $ adj.P.Val: num 0.0631 0.0845 0.1083 0.0888 0.0231 ...
## $ B : num 1.155 0.506 -0.182 0.435 3.774 ...
## $ SYMBOL : chr "BATF3" "TACSTD2" "GPER1" "IGLL1" ...
## $ GENENAME : chr "basic leucine zipper ATF-like transcription factor 3" "tumor associated calcium signal transducer 2" "G protein-coupled estrogen receptor 1" "immunoglobulin lambda like polypeptide 1" ...
## - attr(*, "spec")=
## .. cols(
## .. PROBEID = col_character(),
## .. logFC = col_double(),
## .. AveExpr = col_double(),
## .. t = col_double(),
## .. P.Value = col_double(),
## .. adj.P.Val = col_double(),
## .. B = col_double(),
## .. SYMBOL = col_character(),
## .. GENENAME = col_character()
## .. )
head(merged)
## # A tibble: 6 x 9
## PROBEID logFC AveExpr t P.Value adj.P.Val B SYMBOL GENENAME
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 ILMN_17… -5.87 8.83 -6.64 2.17e-4 0.0631 1.16 BATF3 basic leuc…
## 2 ILMN_17… -5.57 7.61 -5.96 4.36e-4 0.0845 0.506 TACST… tumor asso…
## 3 ILMN_23… -5.48 8.66 -5.30 8.98e-4 0.108 -0.182 GPER1 G protein-…
## 4 ILMN_23… -5.45 12.7 -5.89 4.71e-4 0.0888 0.435 IGLL1 immunoglob…
## 5 ILMN_16… -5.45 7.34 -10.5 9.79e-6 0.0231 3.77 IDO1 indoleamin…
## 6 ILMN_16… -5.29 7.53 -8.54 4.03e-5 0.0332 2.64 CDC20 cell divis…
DE_sig <- dplyr::filter(merged, adj.P.Val <= 0.05, SYMBOL != "NA")
# FDRが0.05以下の遺伝子のみ抜き出す
ma <- ggplot(data = merged, aes(x = AveExpr, y = logFC)) +
geom_point(size = 0.1) +
geom_point(data = DE_sig, colour = "red", size = 0.3) +
theme_classic()
ma + ggtitle("MA plot")
library(ggrepel)
ma + geom_text_repel(data = DE_sig, aes(label = SYMBOL))
vp <- ggplot(data = merged, aes(x = logFC, y = -log10(adj.P.Val))) +
geom_point(size = 0.1) +
geom_point(data = DE_sig, colour = "red", size = 0.3) +
theme_classic()
vp
vp + ggtitle("volcano plot of microarray")
vp + geom_text_repel(data = DE_sig, aes(label = SYMBOL))
ggseqlogoパッケージ使うとggplot2でsequence logoを描画できる。ggplot2の要領で図に色々と書き込める点も便利!時代はtidyverse!
library(MotifDb)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply, Map,
## mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
## setdiff, sort, table, tapply, union, unique, unsplit, which,
## which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:gplots':
##
## space
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Warning: package 'IRanges' was built under R version 3.6.1
## Loading required package: Biostrings
## Loading required package: XVector
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
## See system.file("LICENSE", package="MotifDb") for use restrictions.
library(seqLogo)
## Loading required package: grid
library(ggseqlogo)
# motif visualisation
PotentialIRF8 <- as.list(subset (MotifDb,
tolower (geneSymbol)=="irf8"))
# seqlogo
PotentialIRF8[[2]]
## 1 2 3 4 5 6
## A 0.1621622 0.45945946 0.13513514 0.54054054 0.97297297 0.91891892
## C 0.1621622 0.10810811 0.05405405 0.02702703 0.00000000 0.02702703
## G 0.5135135 0.35135135 0.75675676 0.43243243 0.02702703 0.02702703
## T 0.1621622 0.08108108 0.05405405 0.00000000 0.00000000 0.02702703
## 7 8 9 10 11 12
## A 0.2702703 0.1081081 0.00000000 0.94594595 0.89189189 0.86486486
## C 0.3783784 0.1081081 0.05405405 0.00000000 0.05405405 0.00000000
## G 0.3513514 0.1081081 0.91891892 0.05405405 0.05405405 0.08108108
## T 0.0000000 0.6756757 0.02702703 0.00000000 0.00000000 0.05405405
## 13 14 15
## A 0.1621622 0.05405405 0.3513514
## C 0.3513514 0.35135135 0.1351351
## G 0.3513514 0.10810811 0.4054054
## T 0.1351351 0.48648649 0.1081081
seqLogo(PotentialIRF8[[2]]) # mouse
seqLogo(PotentialIRF8[[4]]) # human
# ggseqlogo
ggseqlogo(PotentialIRF8[[2]])
ggseqlogo(PotentialIRF8[[4]])
p1 <- ggseqlogo(PotentialIRF8[[2]], method = "bits")
p2 <- ggseqlogo(PotentialIRF8[[2]], method = "prob")
gridExtra::grid.arrange(p1, p2)
p3 <- ggplot() + geom_logo(PotentialIRF8[[2]]) + theme_classic() +
scale_y_continuous(expand = c(0,0), limits = c(0, 2.0)) +
scale_x_continuous(expand = c(0,0), breaks = seq(1,15,1))
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
gridExtra::grid.arrange(p1, p3)
# add annotation
ggplot() +
annotate('rect', xmin = 2.5, xmax = 6.5, ymin = -0.05, ymax = 1.9, alpha = .1, col='black', fill='yellow') +
annotate('rect', xmin = 8.5, xmax = 12.5, ymin = -0.05, ymax = 1.9, alpha = .1, col='black', fill='yellow') +
geom_logo(PotentialIRF8[[2]], stack_width = 0.90) +
annotate('text', x=4.5, y=2, label='IRF or ETS') +
annotate('text', x=10.5, y=2, label='IRF motif') +
theme_logo()
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] ja_JP.UTF-8/ja_JP.UTF-8/ja_JP.UTF-8/C/ja_JP.UTF-8/ja_JP.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggrepel_0.8.1 viridis_0.5.1 viridisLite_0.3.0 gplots_3.0.1.1
## [5] ggplot2_3.2.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.2 plyr_1.8.4 pillar_1.4.2
## [4] compiler_3.6.0 bitops_1.0-6 tools_3.6.0
## [7] zeallot_0.1.0 digest_0.6.20 evaluate_0.14
## [10] tibble_2.1.3 gtable_0.3.0 pkgconfig_2.0.2
## [13] rlang_0.4.0 cli_1.1.0 yaml_2.2.0
## [16] xfun_0.9 gridExtra_2.3 withr_2.1.2
## [19] dplyr_0.8.3 stringr_1.4.0 knitr_1.24
## [22] caTools_1.17.1.2 gtools_3.8.1 hms_0.5.1
## [25] vctrs_0.2.0 grid_3.6.0 tidyselect_0.2.5
## [28] glue_1.3.1 R6_2.4.0 fansi_0.4.0
## [31] rmarkdown_1.15 gdata_2.18.0 reshape2_1.4.3
## [34] readr_1.3.1 purrr_0.3.2 magrittr_1.5
## [37] scales_1.0.0 backports_1.1.4 htmltools_0.3.6
## [40] assertthat_0.2.1 colorspace_1.4-1 labeling_0.3
## [43] KernSmooth_2.23-15 utf8_1.1.4 stringi_1.4.3
## [46] lazyeval_0.2.2 munsell_0.5.0 crayon_1.3.4
date()
## [1] "Sat Sep 7 10:33:42 2019"